library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.1.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(limma)
## Warning: package 'limma' was built under R version 4.1.1
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.1.2
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.1.2
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(data.table)
## Warning: package 'data.table' was built under R version 4.1.2
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(formattable)
## Warning: package 'formattable' was built under R version 4.1.2
## 
## Attaching package: 'formattable'
## The following object is masked from 'package:plotly':
## 
##     style
#Data Files and prep work
source("../../../lib/DataProccess.R")
source("../../../lib/NormFuncs.R")
source("../../../lib/OutlierDetectionFuncs.R")
source("../../../lib/DataPathName.R")
BaseDir <- params$BaseDir#get the root of the directory where the data is stored

The steps to this analysis are: Remove outliers Remove trend normalize residual find constant variance

First we look at Madison data

LIMSFullDF <- ParseData(LIMSWastePath(BaseDir))%>%
  filter(Site == "Madison")%>%
  select(Date, Site, N1, BCoV , N2 , PMMoV,
         FlowRate,temperature,equiv_sewage_amt)
ErrorMarkedDF <- LIMSFullDF%>%#
    mutate(FlagedOutliers = IdentifyOutliers(N1, Action = "Flag"),
           #Manual flagging that method misses due to boundary effect with binning
           FlagedOutliers = ifelse(Date == mdy("01/26/2021") | 
                                   Date == mdy("01/27/2021") | 
                                   Date == mdy("01/23/2022"),
                                   TRUE, FlagedOutliers),
           NoOutlierVar = ifelse(FlagedOutliers, NA, N1))

#Split N1 into outlier and non outlier for next ggplot
OutLierPlotDF <- ErrorMarkedDF%>%
  mutate(OutlierN1 = ifelse(FlagedOutliers,N1,NA))%>%
           mutate(N1 := NoOutlierVar)

OutLierPlotObject <- OutLierPlotDF%>%
  filter(!(is.na(N1)&is.na(OutlierN1)))%>%
  ggplot(aes(x=Date))+#Data depends on time 
  geom_line(aes(y=N1,
                color="N1", 
                info = N1))+#compares Var to Cases
  geom_point(aes(y=OutlierN1,
                 color="OutlierN1",
                 info = OutlierN1))
## Warning: Ignoring unknown aesthetics: info

## Warning: Ignoring unknown aesthetics: info
ggplotly(OutLierPlotObject,tooltip=c("info","Date"))
UpdatedDF <- ErrorMarkedDF%>%
  select(-N1)%>%
  rename(N1 = NoOutlierVar)
UpdatedDF$loessN1 <- loessFit(y=(UpdatedDF$N1), 
                      x=UpdatedDF$Date, #create loess fit of the data
                      span=.05, #span of .2 seems to give the best result, not rigorously chosen
                      iterations=2)$fitted#2 iterations remove some bad patterns

SLDLoessGraphic <- UpdatedDF%>%
  NoNa("loessN1")%>%
  ggplot(aes(x=Date))+
  geom_line(aes(y=N1,
                color="N1",
                info = N1),
            alpha=.2)+
  geom_line(aes(y = loessN1, 
                color="loessN1" ,
                info = loessN1))
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(ColumnNames)` instead of `ColumnNames` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
## Warning: Ignoring unknown aesthetics: info

## Warning: Ignoring unknown aesthetics: info
ggplotly(SLDLoessGraphic,tooltip=c("info","Date"))
DetrendedDF <- UpdatedDF%>%
  mutate(DetrendedN1 = N1 - loessN1)


DetrendLoessGraphic <- DetrendedDF%>%
  NoNa("DetrendedN1")%>%
  ggplot(aes(x=Date))+
  geom_line(aes(y=DetrendedN1,
                color="DetrendedN1",
                info = DetrendedN1))
## Warning: Ignoring unknown aesthetics: info
ggplotly(DetrendLoessGraphic,tooltip=c("info","Date"))
mean(DetrendedDF$DetrendedN1, na.rm=TRUE)
## [1] 12278.31
NormRestruct <- function(Norm){
  return(mean(Norm,na.rm=TRUE)/Norm)
}
TestNorm <- function(DF,Base,Norm){
  ResidDetrendLoessGraphic <- ResidDetrendedDF%>%
  NoNa(Norm)%>%
  ggplot(aes(x=Date))+
  geom_line(aes(y=!!sym(Base),
                color=Base,
                info = !!sym(Base)))+
  geom_line(aes(y=!!sym(Norm),
                color=Norm,
                info = !!sym(Norm)))


ggplotly(ResidDetrendLoessGraphic,tooltip=c("info","Date"))
}

ResidDetrendedDF <- DetrendedDF%>%
  mutate(VarCompN1 = DetrendedN1**2,
         N1NormedResid = VarCompN1*NormRestruct(loessN1),
         sqrtNormedResid = VarCompN1*NormRestruct(sqrt(loessN1)),
         logNormedResid = VarCompN1*NormRestruct(log(loessN1)),
         xlogNormedResid = VarCompN1*NormRestruct(loessN1*log(loessN1)),
         WeirdNormedResid = VarCompN1*NormRestruct(sqrt(loessN1)*log(loessN1)))
ResidDetrendLoessGraphic <- ResidDetrendedDF%>%
  NoNa("DetrendedN1")%>%
  ggplot(aes(x=Date, y=DetrendedN1^2,
                info = DetrendedN1))+
  geom_point()+
  geom_smooth(span = .1)


ggplotly(ResidDetrendLoessGraphic,tooltip=c("info","Date"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'